---
title: "03 Matching -- Antenatal"
author: "Ye Shen, Radhika Tampi, Raj Vatsa"
output: pdf_document
---

# Setup
```{r, message = FALSE, warning = FALSE}
knitr::opts_chunk$set(echo = TRUE)
if(!require(webshot)) {
  install.packages("webshot")
}
library(gt)
library(gtsummary)
library(tidyverse)
library(survey)
library(gridExtra)
library(grid)
library(foreign)
library(webshot)
library(MatchIt)
library(stringr)
#webshot::install_phantomjs()
```


```{r}
antenatal<-readRDS("data/cleaned_antenatal_did.rds")
```

## Coarsened Exact Matching: by quintile of wealth score [eval = F]
```{r matching}
## Match households by wealth score quintile: 

# 1. Matching Procedure: 
  # a) Only match on pre-treatment, so match households in 2014  based on wealth quintiles 
  # b) 1:1 match intervention to control

  # Coarsened Exact Matching & explanation of options:
  # k2k: If TRUE nearest neighbor matching without replacement will take place within each stratum, and any unmatched units will be dropped (e.g., if there are more treated than control units in the stratum, the treated units without a match will be dropped)
  # k2k.method: how the distance between units should be calculated if k2k = TRUE

df <- antenatal %>% 
  filter(year == 0) %>%
  matchit(formula = group ~ menage_score_bien_etre,
          method = "cem", cutpoints = list(menage_score_bien_etre = 5), 
          k2k = TRUE, k2k.method = "euclidean") 
summary(df)
df <- match.data(df)
  #euclidean ok because we are only matching on 1 var, no need to "weight" vars based on effect on dependent var
  #use 5 bins for quintiles (generated automatcially by MatchIt)

# 2. Add in data from matched households from 2016 to create total women antenatal dataset
women_match_antental <- antenatal %>%
  filter(year == 1) %>%
  subset(ID_menage %in% df$ID_menage) %>%
  mutate(subclass = df$subclass[match(ID_menage, df$ID_menage)]) %>%
  mutate(weights = 1) %>% ## change if we change the k2k option in MatchIt
  rbind(df) %>%
  arrange(subclass, year)

rm(df) # remove original matched dataframe from R

saveRDS(women_match_antental, "data/match_women_antenatal.rds")
```

## After matching, assign survey design to matched dataset
```{r}
svy_antenatal_match <- svydesign(
    id = ~cluster, strata = ~group, weights = ~wmweight,
    data = women_match_antental
  )
# Create subsets for each year
  svy_antenatal_match_2014 <- subset(svy_antenatal_match, year == 0)
  svy_antenatal_match_2016 <- subset(svy_antenatal_match, year == 1)
```

### DiD regression adjusting for wealth index score

 * Outcome: Any antenatal consultation
```{r}
#Adjusted DID regression on Any antenatal consultation
  did_antenatal_match <- svyglm(antenatal.publichealthcenter.consultation~year*group+menage_score_bien_etre, design = svy_antenatal_match, family = gaussian)
```

## Table 3 for pregnant women
 * Outcome: Any antenatal consultation
 
### Prepare Table 3
 

```{r}
table3_antenatal <- 
  as.data.frame(array(data = NA, dim = c(2, 19)))
# "Indicator","Study group", "Denominator", "Numerator", "Percent (SE)", "Difference 
# (p-value)","Denominator", "Numerator", "Percent (SE)", "Difference (p-value)","Percent 
# relative change", "Percent absolute change", "Difference in differences (p-value)"
colnames(table3_antenatal) <- 
  c("indicator", "group", "d_2014", "n_2014", "m_2014", "se_2014", "dif_2014", 
    "p-val_2014", "d_2016", "n_2016", "m_2016", "se_2016", "dif_2016", "p-val_2016",
    "trends_rel", "trends_abs",  "dif.in.dif(man)", "dif.in.dif(coef)", "p-val_did")

table3_antenatal$indicator <- "antenatal.publichealthcenter.consultation"

# Any visit (Access)
## Summary stats for 2014
table3_antenatal[rev(c(1,2)), 2] <- c(0,1)
table3_antenatal[rev(c(1,2)), 3] <- 
    round(svytable( ~group, design = svy_antenatal_match_2014)) # 'd_2014'
table3_antenatal[rev(c(1,2)), 4] <- 
    round(coef(svyby(~antenatal.publichealthcenter.consultation, by = ~group, design = svy_antenatal_match_2014, FUN = svytotal, keep.var = T, na.rm = T))) # 'n_2014'
table3_antenatal[rev(c(1,2)), 5] <- 
    round(coef(svyby(~antenatal.publichealthcenter.consultation, by = ~group, design = svy_antenatal_match_2014, FUN = svymean,keep.var = T, na.rm = T))*100,2) # 'm_2014'
table3_antenatal[rev(c(1,2)), 6] <- 
    round(SE(svyby(~antenatal.publichealthcenter.consultation, by = ~group, design = svy_antenatal_match_2014, FUN = svymean,keep.var = T, na.rm = T))*100,2) # 'se_2014'
table3_antenatal[rev(c(1,2)), 7] <- table3_antenatal[1, 5] - table3_antenatal[2, 5]# 'dif_2014'

### Estimate p-value of the 2014 difference with a Chi2 test
  if (table3_antenatal[1, 4] == 0 & 
      table3_antenatal[2, 4] == 0) {
    table3_antenatal[2, 8] <- NA
  } else {
    table3_antenatal[2, 8] <- 
      round(svychisq(~antenatal.publichealthcenter.consultation+group, design = svy_antenatal_match_2014)$p.value,2)
  }

## Summary stats for 2016
table3_antenatal[rev(c(1,2)), 9] <- 
    round(svytable( ~group, design = svy_antenatal_match_2016)) # 'd_2016'
table3_antenatal[rev(c(1,2)), 10] <- 
    round(coef(svyby(~antenatal.publichealthcenter.consultation, by = ~group, design = svy_antenatal_match_2016, FUN = svytotal,keep.var = T, na.rm = T))) # 'n_2016'
table3_antenatal[rev(c(1,2)), 11] <- 
    round(coef(svyby(~antenatal.publichealthcenter.consultation, by = ~group, design = svy_antenatal_match_2016, FUN = svymean, keep.var = T, na.rm = T))*100,2) # 'm_2016'
table3_antenatal[rev(c(1,2)), 12] <- 
    round(SE(svyby(~antenatal.publichealthcenter.consultation, by = ~group, design = svy_antenatal_match_2016, FUN = svymean,keep.var = T, na.rm = T))*100,2) # 'se_2016'
table3_antenatal[rev(c(1,2)), 13] <- table3_antenatal[1, 11] - table3_antenatal[2, 11]# 'dif_2016'

### Estimate p-value of the 2016 difference with a Chi2 test
  if (table3_antenatal[1, 10] == 0 & 
      table3_antenatal[2, 10] == 0) {
    table3_antenatal[2, 14] <- NA
  } else {
    table3_antenatal[2, 14] <- 
      round(svychisq(~antenatal.publichealthcenter.consultation+group, design = svy_antenatal_match_2016)$p.value,2)
  }

## Estimate values for trends 2014-2016
  table3_antenatal[rev(c(1, 2)), 15] <- round((table3_antenatal[rev(c(1, 2)), 11] - 
       table3_antenatal[rev(c(1, 2)), 5]) / table3_antenatal[rev(c(1, 2)), 5]*100,2) #'trends_rel'
  table3_antenatal[rev(c(1, 2)), 16] <- table3_antenatal[rev(c(1, 2)), 11] - 
    table3_antenatal[rev(c(1, 2)), 5] #'trends_abs'
  table3_antenatal[2, 17] <- table3_antenatal[1, 16] - table3_antenatal[2, 16] #'dif.in.dif(man)'

## DiD stats 
  # 'dif.in.dif(coef)': Coefficient of Difference-in-Differences comparison 
  table3_antenatal[2, 18] <- round(summary(did_antenatal_match)$coefficients[5, 1]*100,2)
  # 'p-val_did': p-value of Difference-in-Differences comparison
  table3_antenatal[2, 19] <- round(summary(did_antenatal_match)$coefficients[5, 4],2)
```

### Final Table 3 Antenatal
```{r}
# Replicate final Table 2
final_table3_antenatal<-table3_antenatal
final_table3_antenatal$m_2014 <- sprintf("%.2f (%.2f)", final_table3_antenatal$m_2014, 
                                       final_table3_antenatal$se_2014)
final_table3_antenatal$m_2016 <- sprintf("%.2f (%.2f)", final_table3_antenatal$m_2016, 
                                       final_table3_antenatal$se_2016)
final_table3_antenatal$dif_2014 <- sprintf("%.2f (%.2f)", final_table3_antenatal$dif_2014, 
                                         final_table3_antenatal$`p-val_2014`)
final_table3_antenatal$dif_2016 <- sprintf("%.2f (%.2f)", final_table3_antenatal$dif_2016, 
                                         final_table3_antenatal$`p-val_2016`)
final_table3_antenatal$`dif.in.dif(coef)` <- sprintf("%.2f (%.2f)", 
                                                  final_table3_antenatal$`dif.in.dif(coef)`, 
                                                  final_table3_antenatal$`p-val_did`)
final_table3_antenatal$se_2014 <- NULL
final_table3_antenatal$se_2016 <- NULL
final_table3_antenatal$`p-val_2014` <- NULL
final_table3_antenatal$`p-val_2016` <- NULL
final_table3_antenatal$`p-val_did` <- NULL
final_table3_antenatal$dif_2014[grep("NA", final_table3_antenatal$dif_2014)] <- "--"
final_table3_antenatal$dif_2016[grep("NA", final_table3_antenatal$dif_2016)] <- "--"
final_table3_antenatal$`dif.in.dif(man)`<- NULL
final_table3_antenatal$`dif.in.dif(coef)`[grep("NA", final_table3_antenatal$`dif.in.dif(coef)`)] <- "--"

final_table3_antenatal <- as_tibble(final_table3_antenatal) %>% 
  select(-c(n_2014, n_2016, indicator)) %>%
  mutate(
    group = case_when(group == 1 ~ "IG", 
                      group == 0 ~ "NG"))
```

## Create Table 3: Antenatal
```{r}
gt(final_table3_antenatal) %>%
  tab_header(title = ("Table S2. Summary of responses and trends from the 2014 and 2016 IHOPE surveys for 1 or more consultations at a Public Health Facility among pregnant women (15-49 years old)")) %>%
  tab_style(style = cell_text(align = "left"), location = cells_title("title")) %>%
  tab_spanner(
    label = "2014",
    columns = c(d_2014, m_2014, dif_2014)
  ) %>%
  tab_spanner(
    label = "2016",
    columns = c(d_2016, m_2016, dif_2016)
  ) %>%
  tab_spanner(
    label = "2014-2016 Trend",
    columns = c(trends_rel, trends_abs,`dif.in.dif(coef)`)
  ) %>%
  tab_style(style = cell_text(weight = "bold"), 
            location = cells_column_spanners(spanners = everything())) %>%
  cols_label(group = "Study group", d_2014 = "N", m_2014 = "Percent (SE)", 
               dif_2014 = " Difference (p-value)", d_2016 = "N", 
               m_2016 = "Percent (SE)", dif_2016 = " Difference (p-value)", 
               trends_rel = "Percent relative change", 
               trends_abs = "Percent absolute change", 
             `dif.in.dif(coef)` = "Adjusted DiD (p-value)") %>%
  tab_style(style = cell_text(weight = "bold"), 
              location = cells_column_labels(columns = everything())) %>%
  gt::gtsave("table_fig/paper/Supplemental/table_s2_antenatal_match.png")
```


## Figure 3: Antenatal
```{r}
# Figure:
# Generate figure based on table
num_var <- 1

fig <- as.data.frame(matrix(nrow = num_var * 4, ncol = 6))

fig$V1 <- round(c(
  table3_antenatal$m_2014, table3_antenatal$m_2016), 2)
fig$V2 <- round(c(
  table3_antenatal$n_2014, table3_antenatal$n_2016), 0)
fig$V3 <- round(c(
  table3_antenatal$d_2014, table3_antenatal$d_2016), 0)
fig$V4 <- rep(c("Intervention", "Non-Intervention"), num_var * 2)
fig$V5 <- rep(c("2014 survey", "2016 survey"), each = num_var * 2)
fig$V6 <- rep("Antenatal Care-seeking", num_var * 4) ### RENAME BASED ON OUTCOME ###
colnames(fig) <- c("mean", "num", "dem", "group", "year", "indicator")
fig$group <- factor(fig$group, levels = c("Non-Intervention", "Intervention"))

figure <- 
ggplot(fig, 
         aes(x = mean, y = group, group = interaction(group, indicator), 
             label = sprintf("%s/%s", num, dem))) +
  geom_point(aes(colour = year), size = 3.5) +
  scale_colour_manual(breaks = fig$year, 
                      values = c("2014 survey" = "black", "2016 survey" = "red")) +
  geom_line() +
  labs(
    x = "Percentage", y = "",
    title = "Matching on Household Wealth Index" 
  ) +
  theme_bw() +
  scale_x_continuous(limits = c(0, 100), breaks = seq(0, 100, 10), expand = c(0, 0)) +
  scale_y_discrete(position = "right") +
  theme(
    legend.position = "bottom", axis.text = element_text(size = 10), 
    panel.grid.minor = element_blank(), legend.title = element_blank(),
    title = element_text(size = 12, face = "bold"), 
    axis.title = element_text(size = 13),
    legend.text = element_text(size = 12)
  )+
  annotate("text", label="{", x=50, y=1.5, size =20, fontface="plain")+
  annotate("text", label=paste0("Adjusted DiD (p-val): \n",final_table3_antenatal$ `dif.in.dif(coef)`[2]), x=35, y=1.5)

saveRDS(figure,"table_fig/antenatal/fig_ante_match.rds")
ggsave("table_fig/antenatal/fig3_match.png")
```



# Assesing Balance Before and After Matching
```{r}
antenatal_notmatch <- readRDS("data/cleaned_antenatal_did.rds") %>%
    mutate(group_year=as.factor(case_when(
    group==1&year==0 ~ 1, #Treatment in pre-period: Group 1 in Stuart et al 2014
    group==1&year==1 ~ 2, #Treatment in post-period: Group 2 in Stuart et al 2014
    group==0&year==0 ~ 3, #Control in pre-period: Group 3 in Stuart et al 2014
    group==0&year==1 ~ 4 #Control in post-period: Group 4 in Stuart et al 2014
  ))) %>%
  mutate(match = 0) 

antenatal_match<-readRDS("data/match_women_antenatal.rds")%>%
  mutate(group_year=as.factor(case_when(
    group==1&year==0 ~ 1, #Treatment in pre-period: Group 1 in Stuart et al 2014
    group==1&year==1 ~ 2, #Treatment in post-period: Group 2 in Stuart et al 2014
    group==0&year==0 ~ 3, #Control in pre-period: Group 3 in Stuart et al 2014
    group==0&year==1 ~ 4 #Control in post-period: Group 4 in Stuart et al 2014
  ))) %>%
  mutate(match = 1) %>%
  select(-c(subclass, weights))

antenatal_full <- rbind(antenatal_match, antenatal_notmatch)
antenatal_full$match <- factor(antenatal_full$match, levels = c("0", "1"),
                  labels = c("Unmatched", "Matched")
                  )

antenatal_full %>%
  ggplot() +
  geom_density(aes(menage_score_bien_etre, fill = group_year), alpha = 0.4, adjust = 1.5) +
  labs(x = "Wealth Index Score", title = "Fig. S3 Balance Before and After Matching For Households with Pregnant Women", fill = "Treatment and Period") +
  scale_fill_discrete(labels=c("1"="Treatment Preperiod","2"="Treatment Postperiod","3"="Control Preperiod","4"="Control Postperiod")) +
  theme_bw()+
  theme(plot.title = element_text(size=8), 
        axis.line = element_line(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank()) +
  facet_wrap(~match)
ggsave("table_fig/paper/Supplemental/fig_s3_matching_balance_antenatal.png", width=8, height=4, dpi=300)
```
